set more off

*set working directory

*cd "...\Source Data\"				

/********************************************************************************
*** Import Sentencing Records											  	  ***
********************************************************************************/

insheet using "california_dispositions.csv", comma names clear

ds , has(type string)
foreach var of varlist `r(varlist)' {
    quietly replace `var' = "" if `var' == "N/A" 
}

destring, replace
duplicates drop _all, force

*** Add defendant race

tempfile tempfile
save `tempfile', replace
insheet using "arrest_records.csv", comma names clear
merge 1:m  id	county	firstname lastname filedate using `tempfile'

/********************************************************************************
***  Generate Key Variables												  	  ***
********************************************************************************/

gen edate = date(dates,"MDY")
format edate %td
la var edate "Sentencing date in Stata time"
la var dates "Sentencing date"

gen year = year(edate)
drop if year < 2015
la var year "Year of sentence"

gen month = month(edate)
la var month "Month of sentence"

egen year_month = group(year month)
la var year_month "Year-month of sentence"

gen week = week(edate)
la var week "Week of sentence"

egen year_week = group(year week)
la var year_week "Year-week of sentence"

summ edate
capture drop time
gen time_a = edate-mdy(6,6,2016)
la var time_a "Elapsed date since petition announcement"

gen announce = edate > mdy(6,6,2016)
la var announce "Post petition announcement indicator"

gen time_s = edate - mdy(1,24,2018)
la var time_s "Elapsed date since signature certification"

gen signatures = edate > mdy(1,24,2018)
la var signatures "Post signature certification indicator"

gen time_r = edate - mdy(6,5,2018)
la var time_r "Elapsed date since recall election"

gen recall = edate > mdy(6,5,2018)
la var recall "Post recall election indicator"

replace announce = time_a>=0
replace signatures = time_s>=0
replace recall = time_r>=0

***generate fixed effects
gen dow = dow( edate)
la var dow "Day of Week"
forvalues d = 0/6{
g dow_`d' = 0
	replace dow_`d' = 1 if dow == `d'
}
forvalues y = 2015/2019{
g year_`y' = 0
	replace year_`y' = 1 if year == `y'
}
encode sentencingjudge, gen(judge_code)
tab judge_code, gen(jcode)
global judgeFEs jcode1-jcode159
global dowFEs dow_0 dow_1 dow_2 dow_3 dow_4 dow_5 dow_6
global yearFEs year_2019 year_2018 year_2017 year_2016 year_2015 

*sentence days fixes
replace sentence_days = 240 if adultconfinementterm=="240 day"
replace sentence_days = 120 if adultconfinementterm=="120 day"
replace sentence_days =973.44 if adultconfinementterm=="32month"


***generate stat and county FE
tab statute_code, gen(statcode)
global statFEs statcode1-statcode409

***generate judge-level weekly sentencing counts
bys judge_code edate: gen daily_sentencing_workload = _N
la var daily_sentencing_workload "Daily Caseload by Judge"
gen log_daily_sentencing_workload = ln(daily_sentencing_workload+1)
la var log_daily_sentencing_workload "(log) Daily Caseload by Judge"

***label data
la var enh117012 "Prior conviction indicator (no sentence enhancement)"
la var enh113704 "HSC § 11370.4: No Probation, No Suspended Sentence. Enhancement of 3-25 years depending upon the substance weight"
la var enh23578 "Refusal to take blood test. Sentencing enhancement varies by state."	
la var enh120227  "great bodily harm by accident"
la var enh113702  "Prior. Seperate three year term for every prior"
la var enh12022 "Gun Related enhancements"
la var enh667 "Prior convictions"
la var enh6678 "Sex enhancements" 
la var enh186 "gang activity"
la var enh120221 "Secondary (while person is released from custody) crime"
la var enh11927 "Serious offense"
la var enh18611 "Freeze amd Seize Law"
*foreach var of varlist enh*{
*replace `var' = "" if `var'==""
*}
destring enh*, replace
foreach var of varlist enh*{
replace `var' = 0 if mi(`var')
}


gen enhancements = 0
local vlist = "enh117012 enh113704 enh667 enh12022 enh120221 enh113702 enh120227 enh11927 enh18611 enh186 enh23578 enh6678" 
foreach v in `vlist' {
	*replace `v' = "0" if `v' == "NA"
	destring `v', replace
	replace enhancements = enhancements + `v'
}

***cluster level
egen cluster_judge_statute = group(judge_code statute_code)

***outcomes
gen sentence_normed_to_statute = sentence_days/max_possible
la var sentence_normed_to_statute "Sentence/Conviction Charge Severity"

gen sentence_normed_truncated = min(sentence_normed_to_statute,1)
la var sentence_normed_truncated "min(Sentence/Conviction Charge Severity,1)"
replace sentence_normed_truncated = . if sentence_normed_to_statute == .

drop jcode* statcode* year_mon* dow_* year_* 
*encode statute, gen(statute_code)


/*Generate statute and judge dummy variables*/
qui tab statute, gen(s_)
qui tab judge, gen(j_)

/*Generate a unique identifier for county x statute*/
egen cluster_county_statute = group(county statute_code)
/*Elapsed filing date in Stata time */
gen efiledate = date(filedate,"MDY")
la var efiledate "Filing date in Stata time"
la var filedate "Filing date"
capture drop _merge

/*Recode statute_code as group var*/
drop statute_code
egen statute_code = group(statute)


/*Recode case and defendant characteristics*/
gen nonviolent_crime = 1-violent_crime
la var nonviolent_crime "Nonviolent Crime (based on offense group)"

gen nonviolent_crime_667=1-violent_crime_667
la var nonviolent_crime_667 "Nonviolent Crime (as enumerated in PC 667.5)"
gen Black = arrest_race == "Black" 
replace Black = . if (arrest_race == ""|arrest_race == "American Indian")
gen Hispanic = arrest_race == "Hispanic" & arrest_race !=""
replace Hispanic = . if arrest_race == ""|arrest_race == "American Indian"
gen White = arrest_race == "White" & arrest_race !=""
replace White = . if arrest_race == ""|arrest_race == "American Indian"
gen Asian = arrest_race !="American Indian" & arrest_race !="Black" & arrest_race != "Hispanic" & arrest_race !="White"
replace Asian = . if arrest_race == ""|arrest_race == "American Indian"

gsort +casenumber -sentence_days

/*Different codings of outcome variables*/
gen sentence_normed_to_atc_truncated= min(sentence_days/top_indict_max,1)
la var sentence_normed_to_atc_truncated "min((Sentence)/(Indictment Charge Severity),1)"

gen sentence_normed_to_atc = sentence_days/top_indict_max
la var sentence_normed_to_atc "(Sentence)/(Indictment Charge Severity)"

gen max_over_atcmax = max_possible_sentence/top_indict_max
la var max_over_atcmax "(Conviction Charge Severity)/(Indictment Charge Severity)"

gen charge_bargain = 1-max_over_atcmax
la var charge_bargain "Reduction in Charge Severity"


gen log10_sentence_days = log10(sentence_days)
la var log10_sentence_days "(log) Sentence Length (in days)"


la var sentence_days "Sentence Length (in days)"
la var probationterm_cleaned_days "Probation Length (in days)"
la var max_possible_sentence  "Statutory Maximum (in days)"
la var mid_possible_sentence "Statutory Midpoint (in days)"
la var min_possible_sentence "Statutory Minimum (in days)"
la var possible_death_sentence "Charge Carries Death Sentence"
la var statute "Offense code"
la var id "Source file identifier "
la var chargenumber "Charge number"
la var chargeoffensedescription "Description of offense"
la var chargedate "Date of indictment"
la var casenumber "Unique case identifier"
la var court "Court location (if available)"
la var casejudge "Sitting judge at indictment"
la var disposition_date "Date of disposition"
la var plea_guilty "Plead Guilty"
la var arrest_age_def "Defendant age (based on arrest records)"
la var arrest_gender_def "Defendant gender (based on arrest records)"
la var numberofcounts "Number of charges in case"
la var violent_nonsex_667 "Nonsexual Violent Crime (as enumerated in PC 667.5)"	
*drop irrelevant columns
drop injury_flag force_flag x  item probationstartdate ///  
	probationenddate dlconditionsbacdrugs dlconditionsbaclevel dlconditionsrevocationterm   ///   
	dlconditionssuspensionterm    dlconditionspursuanttovehiclecod ///  
	dlconditionsignitioninterlockdev events  dlconditionstype         dlconditionscomment        ///  
	firstname  lastname   middlename  ///  
	surname   race    levely    enh117012   ///  
	enh113704  enh667 enh12022   enh120221   ///  
	enh113702  enh120227 enh11927   enh18611   ///  
	enh186    name    groupchargecounter        held  ///      
	nojudge   enh23578 enh6678   adultconfinementinlieuof   ///    
	case     minuteorder feecalculationmodifiedfineamount feecalculationadjustedtotalfinea ///  
	pretrial   commitmentstateprison pc859sentencing         preliminaryhearing        ///  
	prepreliminaryhearing      disporeset portal    feecalculationpriors       ///  
	sentencingfelony bailorhearing chargedates xref     ///  
	arrest_name arrest_date_error arrest_lastname arrest_middlename         ///  
	arrest_firstname arrest_middleletter arrest_date_closet    ///           
	 n  enhancements ///
	adultconfinementtermtype adultconfinementstartdate adultconfinementsuspendedreason ///
	adultconfinementconductcredit adultconfinementstarttime adultconfinementstayedreason ///
	commentx probationtype probationterm commenty adultconfinementtype adultconfinementfacility ///
	adultconfinementterm adultconfinementcomment adultconfinementconcurrent adultconfinementsuspended ///
	adultconfinementstayed adultconfinementconsecutive probationcomment conditionjuvenileparties ///
	groupcharge groupcounter levelx casetype casestatus ///
	rawstatute accountedpriors attempted sumcode bcscode hierarchy codetype offensedescription possiblesentence ///
	years months days count severity allegation ///
	sentence_type degree off_lev code_type level case_page top_indict_max_possible top_indict_poss_life_sentence ///
	arrest_race arrest_date arrest_bailamount arrest_black_def arrest_hispanic_def arrest_black_or_hispanic_def ///
	citation chargeid date counter amended rulings sentencing file status ///
	gender adultconfinementbalancetobeserve adultconfinementcreditfortimeser ///
	arrest_topline_charge_code arrest_date1 min_arrest_date_error one_arrest_charge affil county_flag ///
	arrest_topline_max arrest_topline_charge arrest_topline_charge_desc middleletter prefix suffix

	

save recall_data_main, replace